import csv
from matplotlib import pyplot as plt,rcParams
def pretreat():
    reader=csv.reader(open("data/building-violations.csv","r",encoding="utf-8")) #读取csv文件
    with open("building-violations_100000.csv","w",encoding="utf-8",newline="") as f: #将前100000行写入新文件
        f_csv=csv.writer(f)
        cnt=0
        for row in reader:
            f_csv.writerow([row[4],row[12],row[14],row[15]])
            cnt+=1
            if cnt>=100001:
                break
        f.close()
    reader=csv.reader(open("building-violations_100000.csv","r",encoding="utf-8"))
    datas=[row for row in reader]
    #['VIOLATION STATUS', 'INSPECTION STATUS','INSPECTION CATEGORY', 'DEPARTMENT BUREAU']
    countDict=[{} for _ in range(4)]
    for row in datas[1:]:
        for i,item in enumerate(row):
            if item not in countDict[i]:
                countDict[i][item]=1
            else:
                countDict[i][item]+=1
    #可视化
    plt.ticklabel_format(style="plain")
    rcParams['font.sans-serif']=['SimHei']   # 用黑体显示中文
    rcParams['axes.unicode_minus']=False     # 正常显示负号
    titles=['VIOLATION STATUS', 'INSPECTION STATUS','INSPECTION CATEGORY', 'DEPARTMENT BUREAU']
    for i,dic in enumerate(countDict):
        x_data,y_data=zip(*dic.items())
        plt.bar(x_data,y_data)
        plt.title(titles[i])
        plt.xlabel("属性值")
        plt.ylabel("数量")
        plt.show()
def getData():
    reader = csv.reader(open("building-violations_100000.csv", "r", encoding="utf-8"))
    data = [row for row in reader]
    transfer={'OPEN':'1','FAILED':'2','COMPLAINT':'3','CONSERVATION':'4'}
    D=[]
    for row in data:
        transcation=[transfer[item] for item in row if item in transfer]
        if transcation:
            D.append(transcation)
    return D